This document is a lab-book for visual data mining carried out on the CHUSJ dataset.
# Read dataset from SPSS file
BD_chusj <- haven::read_sav(file.path("data", "BD_chusj_MAIN.sav"))
BD_chusj <- BD_chusj %>% zap_formats %>% zap_label %>% zap_widths
hist(BD_chusj$Age, col = "azure3",
main = "Histogram of Age",
xlab = "Age" )
# Add boxplot
par(new = TRUE)
boxplot(BD_chusj$Age,
horizontal = TRUE,
axes = FALSE,
boxwex = 0.25,
col = rgb(0.81, 0.85, 0.9, alpha = 0.5))
summary(BD_chusj$Age) %>% print()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 55.00 65.00 62.81 80.00 95.00
Variable Date Of First Positive Lab Result presented with different granularities: Daily, Weekly and Monthly.
FirstDate <-
BD_chusj %>%
select(DateOfFirstPositiveLabResult) %>%
group_by(date = DateOfFirstPositiveLabResult) %>%
summarise(freq = n())
FirstDate_daily <- xts::as.xts(FirstDate$freq, order.by = as.Date(FirstDate$date))
FirstDate_weekly <- xts::apply.weekly(FirstDate_daily, sum)
FirstDate_monthly <- xts::apply.monthly(FirstDate_daily, sum)
par(mfrow = c(3,1))
plot(FirstDate_daily, col = "cadetblue4", lwd = 3,
main = "Date of first positive lab result - daily")
plot(FirstDate_weekly, col = "cadetblue4", lwd = 3,
main = "Date of first positive lab result - weekly")
plot(FirstDate_monthly, col = "cadetblue4", lwd = 3,
main = "Date of first positive lab result - monhly")
par(mfrow = c(1,1))
# Clear aux variables
rm(list = ls()[grep("^FirstDate", ls())])
Count of comorbidities (total n = 2688 patient).
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- sapply(comorbidities, sum) %>% as.data.frame()
comorbidities <- data.frame(comorbidities = rownames(comorbidities),
count = comorbidities[,1])
ggplot(data = comorbidities, aes(x = reorder(comorbidities, count), y = count)) +
geom_bar(stat = 'identity', color = "cadetblue4", fill = "azure3", width = 0.85) +
geom_text(aes(label = count), hjust = -0.3, color = "slategray4", size = 3.5) +
coord_flip() +
ggtitle(label = "Comorbidities",
subtitle = "Frequency in 2688 patients") +
labs(x = "", y = "")
rm(comorbidities)
Relative frequency of comorbidities among patient (total n = 2688 patients)
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- sapply(comorbidities, sum) %>% t() %>% as.data.frame()
comorbidities <- comorbidities / nrow(BD_chusj) # convert to percentage
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(comorbidities), 0.1, f = ceiling)
# Prepare data for radar chart
comorbidities <- rbind(max = rep(max.axis, length(comorbidities)),
min = rep(0, length(comorbidities)),
value = comorbidities)
radarchart(comorbidities,
axistype = 1,
# custom polygon
pcol = rgb(0.2, 0.5, 0.5, 0.9), # line color
pfcol = rgb(0.2, 0.5, 0.5, 0.2), # fill-in color
plwd = 2, # line width
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.9, # font size of net labels
vlcex = 0.8, # font size of outer labels
title = "Overall frequency of comorbidities (%)")
# Frequency table
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- sapply(comorbidities, sum) %>% as.data.frame()
names(comorbidities) <- "frequency"
comorbidities$percentage <- round(comorbidities$frequency / nrow(BD_chusj), 3) # convert to percentage
comorbidities %>% arrange(desc(percentage)) %>% rmarkdown::paged_table()
Count of ATC groups per Comorbidity (total n = 2688 patients). ATC groups are presented in decreasing order of frequency.
#' ATCs and Comorbidities
ATC_lv1 <- BD_chusj[, c(469, 378:390)]
comorbidities <- BD_chusj[,c(7:18, 23)]
ATCs_comorbidities <- cbind(ATC_lv1, comorbidities)
# Create an empty dataset
ATC.Comorbs_matrix <- data.frame(matrix(NA,
ncol = ncol(comorbidities),
nrow = ncol(ATC_lv1)))
colnames(ATC.Comorbs_matrix) <- names(comorbidities)
rownames(ATC.Comorbs_matrix) <- names(ATC_lv1)
# Compute matrix
for (atc in rownames(ATC.Comorbs_matrix)) {
for (comorb in colnames(ATC.Comorbs_matrix)) {
ATC.Comorbs_matrix[atc, comorb] <-
ATCs_comorbidities[ATCs_comorbidities[,atc] == 1 & ATCs_comorbidities[,comorb] == 1,
c(atc, comorb)] %>% nrow()
}
}
rm(atc, comorb, ATCs_comorbidities, comorbidities)
# Check Top-N ATC families
ATCs <- sapply(ATC_lv1, sum) %>% as.data.frame() %>% arrange(desc(.))
# Select Top-n ATC families
#ATCs <- ATCs %>% head(4)
# Prepare data for radar chart
radar.data <- ATC.Comorbs_matrix[rownames(ATCs), ]
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(radar.data), 50, f = ceiling)
radar.data <- rbind(max = rep(max.axis, ncol(radar.data)),
min = rep(0, ncol(radar.data)),
radar.data)
# Define colors
# colors <- RColorBrewer::brewer.pal(nrow(radar.data) - 2, "Set1")
colors <- c("firebrick3", "steelblue2", "springgreen3",
"darkorchid3", "darkorange2", "gold2",
"tomato4", "chartreuse4", "cyan3",
"wheat3", "deeppink2", "khaki2",
"royalblue2", "coral1")
colors_border <- colors
colors_in <- scales::alpha(colors, alpha = 0.2)
radarchart(radar.data,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = seq(0, max.axis, length.out = 5),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Frequency of top-4 ATC groups per Comorbidity (n)")
legend(x = -1.7,
y = 1,
legend = rownames(radar.data[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
# Frequency table
ATC.Comorbs_matrix[rownames(ATCs), ] %>% rmarkdown::paged_table()
Here, the frequencies are presented relatively to each comorbidity - ie. percentage of patients with Cancer using drugs of group ATC.B.
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Version 2 - with relative frequencies (%)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- sapply(comorbidities, sum) %>% t() %>% as.data.frame()
ATC.Comorbs_matrix.t <- ATC.Comorbs_matrix %>% t() %>% as.data.frame()
ATC.Comorbs_matrix.perc <- sapply(ATC.Comorbs_matrix.t, function(i){round(i / comorbidities[1,], 3)})
# Prepare data for radar chart
radar.data <- matrix(unlist(ATC.Comorbs_matrix.perc),
ncol = length(comorbidities),
nrow = length(ATC_lv1),
byrow = TRUE) %>% as.data.frame()
colnames(radar.data) = names(comorbidities)
rownames(radar.data) = names(ATC_lv1)
radar.data <- rbind(max = rep(1, ncol(radar.data)),
min = rep(0, ncol(radar.data)),
radar.data[rownames(ATCs),])
# Define colors
# colors <- RColorBrewer::brewer.pal(nrow(radar.data) - 2, "Set1")
colors <- c("firebrick3", "steelblue2", "springgreen3",
"darkorchid3", "darkorange2", "gold2",
"tomato4", "chartreuse4", "cyan3",
"wheat3", "deeppink2", "khaki2",
"royalblue2", "coral1")
colors_border <- colors
colors_in <- scales::alpha(colors, alpha = 0.2)
radarchart(radar.data,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
# caxislabels = seq(0, max.axis, length.out = 5),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Frequency of ATC per Comorbidity (n)")
legend(x = -1.7, #1.1,
y = 1, #-0.65,
legend = rownames(radar.data[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
# Frequency table - Absolute frequencies
radar.data[-c(1,2),] %>% rmarkdown::paged_table()
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- cbind(Sex = BD_chusj$Gender,
comorbidities)
comorbidities <- rbind(
# total = comorbidities %>% sapply(., sum) %>% t() %>% as.data.frame(),
male = comorbidities %>% filter(Sex == 1) %>% sapply(., sum) %>% t() %>% as.data.frame(),
female = comorbidities %>% filter(Sex == 0) %>% sapply(., sum) %>% t() %>% as.data.frame())
comorbidities$Sex <- NULL
# convert to percentage
comorbidities <- comorbidities / nrow(BD_chusj)
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(comorbidities), 0.1, f = ceiling)
# Prepare data for radar chart
comorbidities <- rbind(max = rep(max.axis, length(comorbidities)),
min = rep(0, length(comorbidities)),
comorbidities)
# Define colors
# colors <- RColorBrewer::brewer.pal(3, "BuPu")
colors <- c("dodgerblue3", "hotpink1")
colors_border <- colors
colors_in <- c(scales::alpha(colors[1], 0.15), scales::alpha(colors[2], 0.15))
radarchart(comorbidities,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Frequency of Comorbidities per Sex (%)")
legend(x = 1.1,
y = -1,
legend = rownames(comorbidities[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
{
cat("Frequencies in percentage (%):\n")
(comorbidities[-c(1:2),] * 100) %>% round(., digits = 1) %>%
t() %>% as.data.frame() %>% rmarkdown::paged_table()
}
## Frequencies in percentage (%):
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- cbind(IntensiveCare = BD_chusj$IntensiveCare,
comorbidities)
comorbidities <- rbind(
# total = comorbidities %>% sapply(., sum) %>% t() %>% as.data.frame(),
IC_no = comorbidities %>% filter(IntensiveCare == 0) %>% sapply(., sum) %>% t() %>% as.data.frame(),
IC_yes = comorbidities %>% filter(IntensiveCare == 1) %>% sapply(., sum) %>% t() %>% as.data.frame())
comorbidities$IntensiveCare <- NULL
# convert to percentage
comorbidities <- comorbidities / nrow(BD_chusj)
# comorbidities %>% t() %>% round(., digits = 3) %>% as.data.frame() %>% print()
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(comorbidities), 0.1, f = ceiling)
# Prepare data for radar chart
comorbidities <- rbind(max = rep(max.axis, length(comorbidities)),
min = rep(0, length(comorbidities)),
comorbidities)
# Define colors
# colors <- RColorBrewer::brewer.pal(3, "BuPu")
colors <- c("seagreen4", "indianred3")
colors_border <- colors
colors_in <- c(scales::alpha(colors[1], 0.3), scales::alpha(colors[2], 0.6))
radarchart(comorbidities,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Frequency of Comorbidities in Intensive Care(%)")
legend(x = 1.1,
y = -1,
legend = rownames(comorbidities[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
{
cat("Frequencies in percentage (%):\n")
(comorbidities[-c(1:2),] * 100) %>% round(., digits = 1) %>%
t() %>% as.data.frame() %>% rmarkdown::paged_table()
}
## Frequencies in percentage (%):
comorbidities <- BD_chusj[,c(7:18, 23)]
comorbidities <- cbind(Outcome = BD_chusj$Outcome,
comorbidities)
comorbidities <- rbind(
# total = comorbidities %>% sapply(., sum) %>% t() %>% as.data.frame(),
Recovered = comorbidities %>% filter(Outcome == 0) %>% sapply(., sum) %>% t() %>% as.data.frame(),
Died = comorbidities %>% filter(Outcome == 1) %>% sapply(., sum) %>% t() %>% as.data.frame(),
OtherHospital = comorbidities %>% filter(Outcome == 2) %>% sapply(., sum) %>% t() %>% as.data.frame())
comorbidities$Outcome <- NULL
# convert to percentage
comorbidities <- comorbidities / nrow(BD_chusj)
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(comorbidities), 0.1, f = ceiling)
# Prepare data for radar chart
comorbidities <- rbind(max = rep(max.axis, length(comorbidities)),
min = rep(0, length(comorbidities)),
comorbidities)
# Define colors
# colors <- RColorBrewer::brewer.pal(3, "BuPu")
colors <- c("forestgreen", "firebrick3", "dodgerblue3")
colors_border <- colors
colors_in <- c(scales::alpha(colors[1], 0.15), scales::alpha(colors[2], 0.15))
radarchart(comorbidities,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Frequency of Comorbidities per Hospital discharge (%)")
legend(x = 1.1,
y = -0.8,
legend = rownames(comorbidities[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
{
cat("Frequencies in percentage (%):\n")
(comorbidities[-c(1:2),] * 100) %>% round(., digits = 1) %>%
t() %>% as.data.frame() %>% rmarkdown::paged_table()
}
## Frequencies in percentage (%):
Here we compare 2 groups of patients: those in Intensive Care (IC_yes, n=858), and those not in Intensive Care (IC_no, n=1830). For each of those subgroups, we analyse the usage of medication per ATC drug family.
It can be noted, per example, that within patients in IC (red line), 93.7% are medicated with drugs of ATC group A. Comparatively, from those patients not at IC (green line), only 59.5% are medicated with the same ATC drug group.
ATCs.lv1 <- BD_chusj[, c(469, 378:390)]
ATCs.lv1 <- cbind(IntensiveCare = BD_chusj$IntensiveCare,
ATCs.lv1)
# Count elements in each sub-group
{
cat("INTENSIVE CARE", "\n")
cat("Frequency count in each sub-group:\n")
factor(ATCs.lv1$IntensiveCare,
levels = attr(ATCs.lv1$IntensiveCare, "labels"),
labels = names(attr(ATCs.lv1$IntensiveCare, "labels"))) %>%
table() %>% print()
}
## INTENSIVE CARE
## Frequency count in each sub-group:
## .
## N Y
## 1830 858
IC_yes_count <- ATCs.lv1$IntensiveCare[ATCs.lv1$IntensiveCare == 1] %>% length()
IC_no_count <- ATCs.lv1$IntensiveCare[ATCs.lv1$IntensiveCare == 0] %>% length()
# Calculate the percentage of patients in and out of IC for each ATC group
ATCs.lv1 <- rbind(
#'--- Intensive Care == YES -----
IC_yes = ATCs.lv1 %>%
filter(IntensiveCare == 1) %>%
{sapply(., sum) / IC_yes_count} %>%
round(digits=3) %>% t() %>% as.data.frame(),
#'--- Intensive Care == NO ------
IC_no = ATCs.lv1 %>%
filter(IntensiveCare == 0) %>%
{sapply(., sum) / IC_no_count} %>%
round(digits=3) %>% t() %>% as.data.frame()
)
ATCs.lv1$IntensiveCare <- NULL
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(ATCs.lv1), 0.1, f = ceiling)
# Prepare data for radar chart
ATCs.lv1 <- rbind(max = rep(max.axis, length(ATCs.lv1)),
min = rep(0, length(ATCs.lv1)),
ATCs.lv1)
# Define colors
# colors <- RColorBrewer::brewer.pal(3, "BuPu")
colors <- c("indianred2", "forestgreen")
colors_border <- colors
colors_in <- scales::alpha(colors, alpha = 0.2)
radarchart(ATCs.lv1,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Relative use of medication in patients\n in and out Intensive Care",
sub = paste(" Patients in Intensive Care :", IC_yes_count, "\n",
"Patients not in Intensive Care :", IC_no_count, "\n")
)
legend(x = 1.1,
y = -1,
legend = rownames(ATCs.lv1[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
Relative use of medication in patients in and out Intensive Care (in percentage)
{ATCs.lv1[c("IC_no", "IC_yes"),] * 100} %>% t() %>% round(., digits = 1) %>%
as.data.frame() %>% rmarkdown::paged_table()
Here we compare 3 groups of patients regarding their Hospital discharge: those who have recovered (Recovered, n=1828), those who have been transferred to other hospitals (OtherHospital, n=290), and those who have died (Died, n=555). For each of those subgroups, we analyse the usage of medication per ATC drug family.
It can be noted, per example, that within patients who have recovered (green line), 32.1% are medicated with drugs of ATC group C. Comparatively, from those patients who have been moved to another hospital (blue line), 53.8% are medicated with the same ATC drug group, whereas from those who have died (red line) 65.0 were using such medication.
ATCs.lv1 <- BD_chusj[, c(469, 378:390)]
ATCs.lv1 <- cbind(Outcome = BD_chusj$Outcome,
ATCs.lv1)
# Count elements in each sub-group
{
cat("HOSPITAL DISCHARGE", "\n")
cat("Frequency count in each sub-group:\n")
factor(ATCs.lv1$Outcome,
levels = attr(ATCs.lv1$Outcome, "labels"),
labels = names(attr(ATCs.lv1$Outcome, "labels"))) %>%
table() %>% print()
}
## HOSPITAL DISCHARGE
## Frequency count in each sub-group:
## .
## Recovered Died Other Hospital
## 1828 555 290
Outcome_Recovered_count <- ATCs.lv1$Outcome[ATCs.lv1$Outcome == 0] %>% na.omit() %>% length()
Outcome_Died_count <- ATCs.lv1$Outcome[ATCs.lv1$Outcome == 1] %>% na.omit() %>% length()
Outcome_OtherHospital_count <- ATCs.lv1$Outcome[ATCs.lv1$Outcome == 2] %>% na.omit() %>% length()
# Calculate the percentage of patients according to Hospital discharge for each ATC group
ATCs.lv1 <- rbind(
Died = ATCs.lv1 %>% filter(Outcome == 1) %>% {sapply(., sum) / Outcome_Died_count} %>% round(digits=3) %>% t() %>% as.data.frame(),
OtherHospital = ATCs.lv1 %>% filter(Outcome == 2) %>% {sapply(., sum) / Outcome_OtherHospital_count} %>% round(digits=3) %>% t() %>% as.data.frame(),
Recovered = ATCs.lv1 %>% filter(Outcome == 0) %>% {sapply(., sum) / Outcome_Recovered_count} %>% round(digits=3) %>% t() %>% as.data.frame()
)
ATCs.lv1$Outcome <- NULL
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(ATCs.lv1), 0.1, f = ceiling)
# Prepare data for radar chart
ATCs.lv1 <- rbind(max = rep(max.axis, length(ATCs.lv1)),
min = rep(0, length(ATCs.lv1)),
ATCs.lv1)
# Define colors
colors <- c("firebrick3", "dodgerblue3", "forestgreen")
colors_border <- colors
colors_in <- scales::alpha(colors, alpha = 0.2)
radarchart(ATCs.lv1,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Relative use of medication in patients\n based on Hospital discharge",
sub = paste(" Patients Deceased :", Outcome_Died_count, "\n",
"Patients Transfered to Other Hospitals :", Outcome_OtherHospital_count, "\n",
"Patients Recovered :", Outcome_Recovered_count, "\n")
)
legend(x = 1.1,
y = -0.9,
legend = rownames(ATCs.lv1[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
Relative use of medication in patients based on Hospital discharge (in percentage)
{ATCs.lv1[c("Recovered", "OtherHospital","Died"),] * 100} %>% t() %>% round(., digits = 1) %>%
as.data.frame() %>% rmarkdown::paged_table()
Here we compare 4 groups of patients regarding their Mechanical Respiratory and Circulatory Support: ECMO, HFO, NIV, and IMV.
ATCs.lv1 <- BD_chusj[, c(469, 378:390)]
ATCs.lv1 <- cbind(BD_chusj[25:28],
ATCs.lv1)
# BD_chusj[25:28] %>% glimpse()
#' ECMO - Extra Corporeal Membrane Oxygenation
#' HFO - High Flow Oxygen
#' - High‐Frequency Oscillatory ventilation
#' NIV - Non Invasive Ventilation
#' IMV - Invasive Mechanical Ventilation
#' - Intermittent Mandatory Ventilation
# Count elements in each sub-group
{
cat("MECHANICAL RESPIRATORY AND CIRCULATORY SUPPORT", "\n")
cat("Frequency count in each sub-group:\n")
lapply(ATCs.lv1[1:4], function(l){
factor(l, levels = attr(l, "labels"), labels = names(attr(l, "labels"))) %>% table()
})
}
## MECHANICAL RESPIRATORY AND CIRCULATORY SUPPORT
## Frequency count in each sub-group:
## $ECMO
## .
## N Y
## 2609 79
##
## $HFO
## .
## N Y
## 2304 384
##
## $NIV
## .
## N Y
## 2391 297
##
## $IMV
## .
## N Y
## 2325 363
Support_ECMO_count <- ATCs.lv1$ECMO[ATCs.lv1$ECMO == 1] %>% na.omit() %>% length()
Support_HFO_count <- ATCs.lv1$HFO[ATCs.lv1$HFO == 1] %>% na.omit() %>% length()
Support_NIV_count <- ATCs.lv1$NIV[ATCs.lv1$NIV == 1] %>% na.omit() %>% length()
Support_IMV_count <- ATCs.lv1$IMV[ATCs.lv1$IMV == 1] %>% na.omit() %>% length()
# Calculate the percentage of patients in and out of IC for each ATC group
ATCs.lv1 <- rbind(
ECMO = ATCs.lv1 %>% filter(ECMO == 1) %>% {sapply(., sum) / Support_ECMO_count} %>% round(digits=3) %>% t() %>% as.data.frame(),
HFO = ATCs.lv1 %>% filter(HFO == 1) %>% {sapply(., sum) / Support_HFO_count} %>% round(digits=3) %>% t() %>% as.data.frame(),
NIV = ATCs.lv1 %>% filter(NIV == 1) %>% {sapply(., sum) / Support_NIV_count} %>% round(digits=3) %>% t() %>% as.data.frame(),
IMV = ATCs.lv1 %>% filter(IMV == 1) %>% {sapply(., sum) / Support_IMV_count} %>% round(digits=3) %>% t() %>% as.data.frame()
)
ATCs.lv1$ECMO <- ATCs.lv1$HFO <- ATCs.lv1$NIV <- ATCs.lv1$IMV <- NULL
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(ATCs.lv1), 0.1, f = ceiling)
# Prepare data for radar chart
ATCs.lv1 <- rbind(max = rep(max.axis, length(ATCs.lv1)),
min = rep(0, length(ATCs.lv1)),
ATCs.lv1)
# Define colors
# colors <- RColorBrewer::brewer.pal(nrow(ATCs.lv1) - 2, "Set1")
colors <- c("firebrick2", "dodgerblue2", "forestgreen", "darkorange")
colors_border <- colors
colors_in <- scales::alpha(colors, alpha = 0.1)
radarchart(ATCs.lv1,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Relative use of medication in patients based on\n Mechanical Respiratory and Circulatory Support",
sub = paste0("Patients using support:",
" ECMO=", Support_ECMO_count,
"; HFO=", Support_HFO_count,
"; NIV=", Support_NIV_count,
"; IMV=", Support_IMV_count
)
)
legend(x = 1.1,
y = -0.7,
legend = rownames(ATCs.lv1[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
Relative use of medication in patients Mechanical Respiratory and Circulatory Support (in percentage)
{
{ATCs.lv1[c("ECMO", "HFO", "NIV", "IMV"),] * 100} %>% t() %>% round(., digits = 1) %>%
as.data.frame() %>% print
}
## ECMO HFO NIV IMV
## ATC.A 100.0 96.4 97.3 99.2
## ATC.B 100.0 100.0 100.0 99.7
## ATC.C 96.2 71.9 83.5 99.2
## ATC.D 8.9 9.4 9.4 11.0
## ATC.G 5.1 0.5 0.3 1.1
## ATC.H 83.5 78.6 74.4 77.7
## ATC.J 97.5 76.3 81.1 96.1
## ATC.L 10.1 4.9 5.7 4.4
## ATC.M 89.9 52.1 58.6 91.5
## ATC.N 100.0 85.9 89.2 100.0
## ATC.P 0.0 1.3 2.0 2.8
## ATC.R 48.1 70.8 70.0 59.0
## ATC.S 16.5 5.7 6.1 11.0
## ATC.V 0.0 1.8 2.4 3.9
# Define multiframe
par(mfrow = c(1,3))
legend.x = -1.3
legend.y = -0.9
# Plot1: ATCs VS Intensive Care --------------------------------------------------
ATCs.lv1 <- BD_chusj[, c(469, 378:390)]
ATCs.lv1 <- cbind(IntensiveCare = BD_chusj$IntensiveCare,
ATCs.lv1)
IC_yes_count <- ATCs.lv1$IntensiveCare[ATCs.lv1$IntensiveCare == 1] %>% length()
IC_no_count <- ATCs.lv1$IntensiveCare[ATCs.lv1$IntensiveCare == 0] %>% length()
# Calculate the percentage of patients in and out of IC for each ATC group
ATCs.lv1 <- rbind(
# Intensive Care == YES
IC_yes = ATCs.lv1 %>%
filter(IntensiveCare == 1) %>%
{sapply(., sum) / IC_yes_count} %>%
round(digits=3) %>% t() %>% as.data.frame(),
# Intensive Care == NO
IC_no = ATCs.lv1 %>%
filter(IntensiveCare == 0) %>%
{sapply(., sum) / IC_no_count} %>%
round(digits=3) %>% t() %>% as.data.frame()
)
ATCs.lv1$IntensiveCare <- NULL
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(ATCs.lv1), 0.1, f = ceiling)
# Prepare data for radar chart
ATCs.lv1 <- rbind(max = rep(max.axis, length(ATCs.lv1)),
min = rep(0, length(ATCs.lv1)),
ATCs.lv1)
# Define colors
colors <- c("indianred2", "forestgreen")
colors_border <- colors
colors_in <- scales::alpha(colors, alpha = 0.2)
radarchart(ATCs.lv1,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Relative use of medication in patients\n in and out Intensive Care",
sub = paste(" Patients in Intensive Care :", IC_yes_count, "\n",
"Patients not in Intensive Care :", IC_no_count, "\n")
)
legend(x = legend.x,
y = legend.y,
legend = rownames(ATCs.lv1[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
text(-1.2, 1.2, "(A)", cex=1.4, pos=1, col="black")
# Plot2: ATCs vs Outcome ---------------------------------------------------------
ATCs.lv1 <- BD_chusj[, c(469, 378:390)]
ATCs.lv1 <- cbind(Outcome = BD_chusj$Outcome,
ATCs.lv1)
Outcome_Recovered_count <- ATCs.lv1$Outcome[ATCs.lv1$Outcome == 0] %>% na.omit() %>% length()
Outcome_Died_count <- ATCs.lv1$Outcome[ATCs.lv1$Outcome == 1] %>% na.omit() %>% length()
Outcome_OtherHospital_count <- ATCs.lv1$Outcome[ATCs.lv1$Outcome == 2] %>% na.omit() %>% length()
# Calculate the percentage of patients in and out of IC for each ATC group
ATCs.lv1 <- rbind(
Died = ATCs.lv1 %>% filter(Outcome == 1) %>% {sapply(., sum) / Outcome_Died_count} %>% round(digits=3) %>% t() %>% as.data.frame(),
OtherHospital = ATCs.lv1 %>% filter(Outcome == 2) %>% {sapply(., sum) / Outcome_OtherHospital_count} %>% round(digits=3) %>% t() %>% as.data.frame(),
Recovered = ATCs.lv1 %>% filter(Outcome == 0) %>% {sapply(., sum) / Outcome_Recovered_count} %>% round(digits=3) %>% t() %>% as.data.frame()
)
ATCs.lv1$Outcome <- NULL
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(ATCs.lv1), 0.1, f = ceiling)
# Prepare data for radar chart
ATCs.lv1 <- rbind(max = rep(max.axis, length(ATCs.lv1)),
min = rep(0, length(ATCs.lv1)),
ATCs.lv1)
# Define colors
colors <- c("firebrick3", "dodgerblue3", "forestgreen")
colors_border <- colors
colors_in <- scales::alpha(colors, alpha = 0.2)
radarchart(ATCs.lv1,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Relative use of medication in patients\n based on Outcome",
sub = paste(" Patients Deceased :", Outcome_Died_count, "\n",
"Patients Transfered to Other Hospitals :", Outcome_OtherHospital_count, "\n",
"Patients Recovered :", Outcome_Recovered_count, "\n")
)
legend(x = legend.x,
y = legend.y,
legend = rownames(ATCs.lv1[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
text(-1.2, 1.2, "(B)", cex=1.4, pos=1, col="black")
# Plot3: ATCs vs Respiratory Support ---------------------------------------------
ATCs.lv1 <- BD_chusj[, c(469, 378:390)]
ATCs.lv1 <- cbind(BD_chusj[25:28],
ATCs.lv1)
Support_ECMO_count <- ATCs.lv1$ECMO[ATCs.lv1$ECMO == 1] %>% na.omit() %>% length()
Support_HFO_count <- ATCs.lv1$HFO[ATCs.lv1$HFO == 1] %>% na.omit() %>% length()
Support_NIV_count <- ATCs.lv1$NIV[ATCs.lv1$NIV == 1] %>% na.omit() %>% length()
Support_IMV_count <- ATCs.lv1$IMV[ATCs.lv1$IMV == 1] %>% na.omit() %>% length()
# Calculate the percentage of patients in and out of IC for each ATC group
ATCs.lv1 <- rbind(
ECMO = ATCs.lv1 %>% filter(ECMO == 1) %>% {sapply(., sum) / Support_ECMO_count} %>% round(digits=3) %>% t() %>% as.data.frame(),
HFO = ATCs.lv1 %>% filter(HFO == 1) %>% {sapply(., sum) / Support_HFO_count} %>% round(digits=3) %>% t() %>% as.data.frame(),
NIV = ATCs.lv1 %>% filter(NIV == 1) %>% {sapply(., sum) / Support_NIV_count} %>% round(digits=3) %>% t() %>% as.data.frame(),
IMV = ATCs.lv1 %>% filter(IMV == 1) %>% {sapply(., sum) / Support_IMV_count} %>% round(digits=3) %>% t() %>% as.data.frame()
)
ATCs.lv1$ECMO <- ATCs.lv1$HFO <- ATCs.lv1$NIV <- ATCs.lv1$IMV <- NULL
# Find max and round it (ceiling) to the nearest .1 multiple
max.axis <- plyr::round_any(max(ATCs.lv1), 0.1, f = ceiling)
# Prepare data for radar chart
ATCs.lv1 <- rbind(max = rep(max.axis, length(ATCs.lv1)),
min = rep(0, length(ATCs.lv1)),
ATCs.lv1)
# Define colors
colors <- c("firebrick2", "dodgerblue2", "forestgreen", "darkorange")
colors_border <- colors
colors_in <- scales::alpha(colors, alpha = 0.1)
radarchart(ATCs.lv1,
axistype = 1,
# custom polygon
pcol = colors_border, # line color
pfcol = colors_in, # fill-in color
plwd = 2, # line width
plty = 1, # line type
# custom the grid
cglcol = "grey20", # net color
cglty = 3, # net line type
axislabcol = "grey50", # net label color
cglwd = 0.8, # net width
caxislabels = scales::percent(seq(0, max.axis, length.out = 5)),
# custom labels
calcex = 0.8,
vlcex = 0.7,
title = "Relative use of medication in patients based on\n Mechanical Respiratory and Circulatory Support",
sub = paste0("Patients using support:",
" ECMO=", Support_ECMO_count,
";\nHFO=", Support_HFO_count,
"; NIV=", Support_NIV_count,
"; IMV=", Support_IMV_count
)
)
legend(x = legend.x,
y = legend.y,
legend = rownames(ATCs.lv1[-c(1,2),]),
bty = "n",
pch = 20,
col = colors_border,
text.col = "grey40",
cex = 0.9,
pt.cex = 2)
text(-1.2, 1.2, "(C)", cex=1.4, pos=1, col="black")
# Clear temp variables
rm(ATCs.lv1, max.axis)
rm(list = ls()[grep("^colors", ls())])
rm(list = ls()[grep("^IC_", ls())])
rm(list = ls()[grep("^Outcome_", ls())])
rm(list = ls()[grep("^Support_", ls())])
# Clear multiframe
par(mfrow = c(1,1))
#' ATC codes (first level) are listed in variables "469 + 378-390"
ATC_lv1 <- BD_chusj[, c(469, 378:390)]
# Create an empty dataset
heatmap.matrix <- data.frame(matrix(NA, ncol = length(ATC_lv1), nrow = length(ATC_lv1)))
colnames(heatmap.matrix) <- rownames(heatmap.matrix) <- names(ATC_lv1)
# Fill in dataset with count of co-occurences
for (i in 1:length(ATC_lv1)) {
for (j in i:length(ATC_lv1)) {
heatmap.matrix[i,j] <- heatmap.matrix[j,i] <- nrow(ATC_lv1[,c(i,j)][ATC_lv1[,i]==1 & ATC_lv1[,j]==1,])
}
}; rm(i, j)
# Reshape data matrix to unrolled data, to feed heatmap
heatmap.data <- reshape2::melt(as.matrix(heatmap.matrix))
colnames(heatmap.data) <- c("x", "y", "value")
# Change co-occurance counts to to percentages
heatmap.data$value <- round(heatmap.data$value / nrow(ATC_lv1), digits = 3)
# Plot graphic
ggplot(heatmap.data, aes(x, y, fill = value)) +
geom_tile(color = "grey95",
lwd = 1,
linetype = 1) +
theme(axis.text.x = element_text(angle = 90),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
ggtitle(label = "Relative co-frequencies in 1st-level ATC drug groups",
subtitle = "Expressed as % of total patients (n = 2688 patients)") +
geom_text(aes(label = sprintf("%0.3f", value)), color = "gray30", size = 3) +
# scale_fill_gradient(low = "lightsteelblue1", high = "darkolivegreen3") +
scale_fill_gradient(low = "#F9FFFD", high = "#0C8A00") +
guides(fill = guide_colourbar(barwidth = 0.5,
barheight = 20,
title = "%"))
Each cell in this map presents the relative co-frequency of patients using 2 drug groups. That is, cell “ATC.A x ATC.C”, showing 0.360, indicates that 36% of the totality of patients (n=2688) uses simultaneously drugs of ATC groups A and C.
The diagonal line - eg. “ATC.C x ATC.C”, showing 0.412 - can be seen as a relative frequency of the use of that ATC drug group alone. In this case, 41.2% of the totality of patients in the dataset (n=2688) are medicated with drugs from ATC group C.
As main findings:
ATC.B is the single most prevalent ATC drug family, being used by 89.1% of patients.
ATC.A + ATC.B is the most prevalent drug family dual association, being jointly used by 66.6% of patients.
groups <- colnames(heatmap.matrix)
group.combinations <- combn(groups, 2) %>% as.data.frame()
cofrequencies <-
sapply(1:ncol(group.combinations),
FUN = function(n){
heatmap.data %>%
filter(x == group.combinations[1,n],
y == group.combinations[2,n]) %>%
select(value)
})
group.combinations <-
cbind(t(group.combinations), cofrequencies) %>%
as.data.frame() %>%
lapply(., as.character) %>%
as.data.frame()
group.combinations$cofrequencies <- as.numeric(group.combinations$cofrequencies)
group.combinations %>% arrange(desc(cofrequencies)) %>% head(10) %>% print
## V1 V2 cofrequencies
## 1 ATC.A ATC.B 0.666
## 2 ATC.B ATC.N 0.556
## 3 ATC.B ATC.J 0.524
## 4 ATC.A ATC.N 0.507
## 5 ATC.B ATC.R 0.495
## 6 ATC.A ATC.J 0.464
## 7 ATC.J ATC.N 0.440
## 8 ATC.B ATC.C 0.400
## 9 ATC.A ATC.R 0.396
## 10 ATC.A ATC.C 0.360
The most frequent dual combinations are the following, all of which occurring in more than 50% of patients:
ATC.A + ATC.B : 66,6 %
ATC.B + ATC.N : 55,6 %
ATC.B + ATC.J : 52,4 %
ATC.A + ATC.N : 50,7 %
ATC_lv1 <- BD_chusj[, c(469, 378:390)]
ATC_lv1 <- cbind(IntensiveCare = BD_chusj$IntensiveCare,
ATC_lv1)
# Filter only those patients in Intensive Care
ATC_lv1 <- ATC_lv1 %>% filter(IntensiveCare == 1)
ATC_lv1$IntensiveCare <- NULL
# Create an empty dataset
heatmap.matrix <- data.frame(matrix(NA, ncol = length(ATC_lv1), nrow = length(ATC_lv1)))
colnames(heatmap.matrix) <- rownames(heatmap.matrix) <- names(ATC_lv1)
for (i in 1:length(ATC_lv1)) {
for (j in i:length(ATC_lv1)) {
heatmap.matrix[i,j] <- heatmap.matrix[j,i] <- nrow(ATC_lv1[,c(i,j)][ATC_lv1[,i]==1 & ATC_lv1[,j]==1,])
}
}; rm(i, j)
heatmap.data <- reshape2::melt(as.matrix(heatmap.matrix))
colnames(heatmap.data) <- c("x", "y", "value")
# change values to percentage
heatmap.data$value <- round(heatmap.data$value / nrow(ATC_lv1), digits = 3)
# Plot graphic
ggplot(heatmap.data, aes(x, y, fill = value)) +
geom_tile(color = "grey95",
lwd = 1,
linetype = 1) +
theme(axis.text.x = element_text(angle = 90),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
ggtitle(label = "Relative co-frequencies in 1st-level ATC drug groups",
subtitle = paste0("Expressed as % of patients in Intensive Care (n = ", nrow(ATC_lv1), " patients)")) +
geom_text(aes(label = sprintf("%0.3f", value)), color = "gray30", size = 3) +
scale_fill_gradient(low = "#F9FFFD", high = "#a66e29") + #8068b0 #a66e29
guides(fill = guide_colourbar(barwidth = 0.5,
barheight = 20,
title = "%"))
# Clear temp variables
rm(ATC_lv1, heatmap.data, heatmap.matrix)
ATC_lv1 <- BD_chusj[, c(469, 378:390)]
ATC_lv1 <- cbind(Outcome = BD_chusj$Outcome,
ATC_lv1)
# Filter only those patients in Intensive Care
ATC_lv1 <- ATC_lv1 %>% filter(Outcome == 1)
ATC_lv1$Outcome <- NULL
# Create an empty dataset
heatmap.matrix <- data.frame(matrix(NA, ncol = length(ATC_lv1), nrow = length(ATC_lv1)))
colnames(heatmap.matrix) <- rownames(heatmap.matrix) <- names(ATC_lv1)
for (i in 1:length(ATC_lv1)) {
for (j in i:length(ATC_lv1)) {
heatmap.matrix[i,j] <- heatmap.matrix[j,i] <- nrow(ATC_lv1[,c(i,j)][ATC_lv1[,i]==1 & ATC_lv1[,j]==1,])
}
}; rm(i, j)
heatmap.data <- reshape2::melt(as.matrix(heatmap.matrix))
colnames(heatmap.data) <- c("x", "y", "value")
# change values to percentage
heatmap.data$value <- round(heatmap.data$value / nrow(ATC_lv1), digits = 3)
# Plot graphic
ggplot(heatmap.data, aes(x, y, fill = value)) +
geom_tile(color = "grey95",
lwd = 1,
linetype = 1) +
theme(axis.text.x = element_text(angle = 90),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
ggtitle(label = "Relative co-frequencies in 1st-level ATC drug groups",
subtitle = paste0("Expressed as % of deceased patients (n = ", nrow(ATC_lv1), " patients)")) +
geom_text(aes(label = sprintf("%0.3f", value)), color = "gray30", size = 3) +
scale_fill_gradient(low = "#F9FFFD", high = "#cc3746") +
guides(fill = guide_colourbar(barwidth = 0.5,
barheight = 20,
title = "%"))
# Clear temp variables
rm(ATC_lv1, heatmap.data, heatmap.matrix)
heatmap_2groups <- function(index1 = NA,
index2 = NA,
verbose = TRUE) {
#' This function plots a heatmap of 2 ATC groups at 2nd level
#' inputs: the indexes on the 2nd levels codes for each group
#' ATC.A: 362:377 (16 codes)
#' ATC.B: 391:395 (5 codes)
#' ATC.J: 425:430 (6 codes)
#' ATC.N: 441:447 (7 codes)
ATC_group1 <- BD_chusj[, index1]
ATC_group2 <- BD_chusj[, index2]
# Create an empty dataset
heatmap.matrix <- data.frame(matrix(NA,
ncol = ncol(ATC_group1),
nrow = ncol(ATC_group2)))
colnames(heatmap.matrix) <- names(ATC_group1)
rownames(heatmap.matrix) <- names(ATC_group2)
ATC_groups12 <- cbind(ATC_group1, ATC_group2)
for (g1 in seq_along(ATC_group1)) {
for (g2 in seq_along(ATC_group2)) {
heatmap.matrix[g2, g1] <-
nrow(ATC_groups12[, c(g1, g2 + ncol(ATC_group1))][ATC_groups12[,g1]==1 &
ATC_groups12[,g2+ncol(ATC_group1)]==1,])
}
}
heatmap.data <- reshape2::melt(as.matrix(heatmap.matrix))
colnames(heatmap.data) <- c("x", "y", "value")
# change values to percentage
heatmap.data$value <- round(heatmap.data$value / nrow(BD_chusj), digits = 3)
family.group1 <- substring(names(ATC_group1)[1], 5, 5)
family.group2 <- substring(names(ATC_group2)[1], 5, 5)
p <-
ggplot(heatmap.data, aes(x, y, fill = value)) +
geom_tile(color = "grey95",
lwd = 1,
linetype = 1) +
theme(axis.text.x = element_text(angle = 0),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
ggtitle(label = paste("Relative co-frequences in 2nd-level ATC drug groups",
family.group1, "and", family.group2),
subtitle = "Expressed as % of total patients (n = 2688)") +
geom_text(aes(label = sprintf("%0.3f", value)), color = "gray30", size = 3) +
# scale_fill_gradient(low = "lightsteelblue1", high = "darkolivegreen3") +
scale_fill_gradient(low = "#F9FFFD", high = "#0C8A00") +
# coord_fixed() +
guides(fill = guide_colourbar(barwidth = 0.5,
barheight = 7,
title = "%"))
# Find most prevalent combinations
if (verbose) {
heatmap.data %>%
arrange(desc(value)) %>%
mutate(perc = round(value / nrow(BD_chusj), 3)) %>%
relocate(y) %>%
rename(!!family.group1 := y, !!family.group2 := x, freq = value) %>%
head() %>% print()
}
return(p)
}
heatmap_2groups(index1 = 362:377, index2 = 391:395) # A.B
## A B freq perc
## 1 ATC.A02 ATC.B01 0.333 0
## 2 ATC.A02 ATC.B05 0.316 0
## 3 ATC.A03 ATC.B01 0.310 0
## 4 ATC.A10 ATC.B01 0.302 0
## 5 ATC.A03 ATC.B05 0.286 0
## 6 ATC.A10 ATC.B05 0.255 0
heatmap_2groups(index1 = 391:395, index2 = 441:447) # B.N
## B N freq perc
## 1 ATC.B01 ATC.N02 0.459 0
## 2 ATC.B05 ATC.N02 0.415 0
## 3 ATC.B01 ATC.N05 0.247 0
## 4 ATC.B05 ATC.N05 0.239 0
## 5 ATC.B01 ATC.N01 0.179 0
## 6 ATC.B05 ATC.N01 0.166 0
heatmap_2groups(index1 = 391:395, index2 = 425:430) # B.J
## B J freq perc
## 1 ATC.B01 ATC.J01 0.481 0
## 2 ATC.B05 ATC.J01 0.413 0
## 3 ATC.B03 ATC.J01 0.076 0
## 4 ATC.B02 ATC.J01 0.035 0
## 5 ATC.B01 ATC.J05 0.026 0
## 6 ATC.B01 ATC.J02 0.025 0
heatmap_2groups(index1 = 362:377, index2 = 441:447) # A.N
## A N freq perc
## 1 ATC.A03 ATC.N02 0.309 0
## 2 ATC.A02 ATC.N02 0.301 0
## 3 ATC.A10 ATC.N02 0.217 0
## 4 ATC.A02 ATC.N05 0.184 0
## 5 ATC.A03 ATC.N05 0.180 0
## 6 ATC.A02 ATC.N01 0.157 0
# Plot all heatmaps together
p1 <- heatmap_2groups(index1 = 362:377, index2 = 391:395, verbose = F) # A.B
p2 <- heatmap_2groups(index1 = 391:395, index2 = 441:447, verbose = F) # B.N
p3 <- heatmap_2groups(index1 = 391:395, index2 = 425:430, verbose = F) # B.J
p4 <- heatmap_2groups(index1 = 362:377, index2 = 441:447, verbose = F) # A.N
cowplot::plot_grid(p1, p4, p2, p3,
ncol = 2,
nrow = 2,
rel_heights = c(2,1),
scale = 0.9,
labels=c("(A)", "(B)", "(C)", "(D)"),
label_size = 14
)